Overview/Goals

Using time-course RNASeq data from healthy Cflo forager heads:

  • build a circadian gene co-expression network (GCN),
  • annotate the network using published data,

Step 1: Build circadian GCN

1.1 Load data

Dataset: Healthy (uninfected, uninjected) Cflo forager ant heads (three pooled per time point for RNA-extraction and -sequencing), collected every 2h, over a 24h-period. [Control]

# Specify the path to TC5 repo
path_to_tc5_repo = "/Users/biplabendudas/Documents/GitHub/Das_et_al_2021"
# Load the TC5 database
tc5.db <- dbConnect(RSQLite::SQLite(), paste0(path_to_tc5_repo,"/data/TC5_data.db"))

# loading database which contains gene X sample expression data 
data.db <- dbConnect(RSQLite::SQLite(), paste0(path_to_repo,"/data/databases/TC7_data.db"))

# specify sample name
sample.name <- c("cflo_control","cflo_ophio-infected","cflo_beau-infected")

# extract the (gene-expr X time-point-of-sampling) data
dat <-
  data.db %>%
  tbl(., paste0(sample.name[1] ,"_fpkm")) %>%
  select(gene_name, everything()) %>%
  collect()

writeLines("What is the dimensions of the original dataset? [Rows = #genes, Cols = #samples]")
## What is the dimensions of the original dataset? [Rows = #genes, Cols = #samples]
dim(dat[-1])
## [1] 13808    12

1.2 Clean data

The above dataset contains all genes (n=13,808) in the ant genome. However, not all of these genes are expressed in the ant head, and some are expressed at very low levels that are not biologically meaningful. Therefore, we will only keep the genes that are “expressed” (≥1 FPKM for at least half of all time points) in the ant head.

# Which genes are expressed throughout the day in forager heads?
  # count the number of time points that has ≥ 1 FPKM
  n.expressed <- apply(dat[-1], 1, function(x) sum(x >= 1))
  # subset the data and only keep the filtered genes
  dat <- dat[which(n.expressed >=6),]

writeLines("Dimensions of the data post-filtering step [Rows = #genes, Cols = #samples]")
## Dimensions of the data post-filtering step [Rows = #genes, Cols = #samples]
dim(dat)
## [1] 9591   13

This is our cleaned, input data file for building the circadian GCN.

1.3 Format data

  • Log2 transform the data
datExpr = as.data.frame(t(log2(dat[-c(1)]+1)))
names(datExpr) = dat$gene_name
rownames(datExpr) = names(dat)[-c(1)]

# USE THE FOLLOWING CODE TO CHECK IF YOU HAVE ANY BAD SAMPLES #
  # gsg = goodSamplesGenes(datExpr, verbose = 3);
  # gsg$allOK

  # sampleTree = hclust(dist(datExpr0), method = "average");
  # # Plot the sample tree: Open a graphic output window of size 12 by 9 inches
  # # The user should change the dimensions if the window is too large or too small.
  # sizeGrWindow(12,9)
  # #pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
  # par(cex = 1);
  # par(mar = c(0,4,2,0))
  # plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
  #      cex.axis = 1.5, cex.main = 2)

# save the number of genes and samples
# that will be used to create the circadian GCN
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

# visualize the log-transformed data
x = reshape2::melt(as.matrix(t(datExpr)))
colnames(x) = c('gene_id', 'sample', 'value')

writeLines("Visualizing the log-transformed data")
## Visualizing the log-transformed data
ggplot(x, aes(x=value, color=sample)) + geom_density() + theme_Publication()

1.4 Calculate gene-gene similarity

# Calculate Kendall's tau-b correlation for each gene-gene pair

# sim_matrix <- cor((datExpr), method = "kendall") # this step takes time
# save(sim_matrix, file = paste0(path_to_repo, "/results/temp_files/sim_matrix_", sample.name[1], "_TC7.RData")) # might be useful to save the sim_matrix and
load(paste0(path_to_repo, "/results/temp_files/sim_matrix_", sample.name[1], "_TC7.RData")) # load it up

## Let's display a chunk of the matrix (code from Hughitt 2016; github)
heatmap_indices <- sample(nrow(sim_matrix), 200)

writeLines(paste0("Plotting a chunk of the gene-gene similarity matrix with ", length(heatmap_indices), " genes."))
## Plotting a chunk of the gene-gene similarity matrix with 200 genes.
gplots::heatmap.2(t(sim_matrix[heatmap_indices, heatmap_indices]),
          col=inferno(100),
          labRow=NA, labCol=NA,
          trace='none', dendrogram='row',
          xlab='Gene', ylab='Gene',
          main= paste0("Similarity matrix \n correlation method = 'kendall' \n (", length(heatmap_indices), " random genes)"),
          density.info='none', revC=TRUE)

1.5 Create adjacency matrix

  • To create the adjacency matrix, we need to first identify the soft-thresholding power (see WGCNA for more info).
writeLines("Performing network topology analysis to pick soft-thresholding power")
## Performing network topology analysis to pick soft-thresholding power
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=30, by=2))
# # Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 4664.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 4664 of 9591
##    ..working on genes 4665 through 9328 of 9591
##    ..working on genes 9329 through 9591 of 9591
##    Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k.  max.k.
## 1      1  0.00856  0.674          0.965 2610.000  2610.000 3410.00
## 2      2  0.07980 -1.170          0.964 1060.000  1040.000 1720.00
## 3      3  0.27300 -1.790          0.964  516.000   501.000 1020.00
## 4      4  0.44700 -2.180          0.965  283.000   269.000  661.00
## 5      5  0.57400 -2.350          0.976  169.000   157.000  457.00
## 6      6  0.65000 -2.420          0.981  107.000    96.800  330.00
## 7      7  0.70700 -2.480          0.986   70.800    62.500  246.00
## 8      8  0.74900 -2.490          0.991   48.800    42.000  189.00
## 9      9  0.77700 -2.520          0.991   34.600    29.100  149.00
## 10    10  0.80000 -2.550          0.993   25.200    20.700  119.00
## 11    12  0.83400 -2.560          0.992   14.300    11.100   79.80
## 12    14  0.84800 -2.600          0.988    8.640     6.450   55.80
## 13    16  0.86700 -2.580          0.990    5.510     3.930   40.40
## 14    18  0.87300 -2.580          0.992    3.660     2.500   30.00
## 15    20  0.88600 -2.550          0.994    2.520     1.660   22.80
## 16    22  0.89500 -2.510          0.995    1.790     1.130   17.70
## 17    24  0.89800 -2.490          0.993    1.300     0.795   13.90
## 18    26  0.90400 -2.460          0.994    0.964     0.570   11.10
## 19    28  0.90200 -2.440          0.992    0.729     0.417    8.94
## 20    30  0.90700 -2.410          0.993    0.560     0.309    7.29
writeLines("Plotting the resutls from the network topology analysis")
## Plotting the resutls from the network topology analysis
# Plot the results:
# sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

NOTE: The scale-free topology fit index reaches ~0.85 at a soft-thresholding-power=12, and it does not improve drastically beyond that.

Now, we can go ahead and create our adjacency matrix by power-transforming the similarity matrix (see WGCNA for more info).

## Specify the soft-thresholding-power
soft.power = 12

# # Construct adjacency matrix
# adj_matrix <- adjacency.fromSimilarity(sim_matrix,
#                                        power=soft.power,
#                                        type='signed'
#                                         )
# save(adj_matrix, file = paste0(path_to_repo, "/results/temp_files/adj_matrix_", sample.name[1], "_TC7.RData")) # might be useful to save the sim_matrix and
load(paste0(path_to_repo, "/results/temp_files/adj_matrix_", sample.name[1], "_TC7.RData")) # load it up


# Convert adj_matrix to matrix
gene_ids <- rownames(adj_matrix)

adj_matrix <- matrix(adj_matrix, nrow=nrow(adj_matrix))
rownames(adj_matrix) <- gene_ids
colnames(adj_matrix) <- gene_ids

writeLines(paste0("Plotting the power-transformed adjacency matrix for the same ", length(heatmap_indices)," genes as above"))
## Plotting the power-transformed adjacency matrix for the same 200 genes as above
## Same heatmap as before, but now with the power-transformed adjacency matrix
gplots::heatmap.2(t(adj_matrix[heatmap_indices, heatmap_indices]),
                  col=inferno(100),
                  labRow=NA, labCol=NA,
                  trace='none', dendrogram='row',
                  xlab='Gene', ylab='Gene',
                  main='Adjacency matrix',
                  density.info='none', revC=TRUE)

## Delete similarity matrix to free up memory
rm(sim_matrix)
# gc()

Step 2: Identify gene clusters

The following steps are performed as per guidelines from the WGCNA package and several tutorials made available online.

2.1 Create topological overalp matrix

# # Turn adjacency into topological overlap
# TOM = TOMsimilarity(adj_matrix);
# dissTOM = 1-TOM
# save(dissTOM, file = paste0(path_to_repo, "/results/temp_files/dissTOM_", sample.name[1], "_TC7.RData")) # might be useful to save the sim_matrix and
load(paste0(path_to_repo, "/results/temp_files/dissTOM_", sample.name[1], "_TC7.RData")) # load it up

# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average")

writeLines("Plotting the resulting clustering tree (dendrogram)")
## Plotting the resulting clustering tree (dendrogram)
# sizeGrWindow(12,9)
# reset plotting parameter
par(mfrow = c(1,1))
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
     labels = FALSE, hang = 0.04)

2.2 Identify clusters

User defined parameters:

  • minimum size (number of genes) of modules | var-name: minModuleSize
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 50;

# Module identification using dynamic tree cut:
dynamicMods= cutreeDynamic(dendro = geneTree,
                           distM = dissTOM,
                           method = "hybrid",
                           verbose = 4,
                           deepSplit = 3, # see WGCNA for more info on tuning parameters
                           pamRespectsDendro = FALSE,
                           minClusterSize = minModuleSize);
##  ..cutHeight not given, setting it to 0.997  ===>  99% of the (truncated) height range in dendro.
##  ..Going through the merge tree
##  
##  ..Going through detected branches and marking clusters..
##  ..Assigning Tree Cut stage labels..
##  ..Assigning PAM stage labels..
##  ....assigned 2501 objects to existing clusters.
##  ..done.
# view number of genes in each module
# table(dynamicMods)

writeLines("How many genes are there in each of the initial modules (clusters) detected?
Note: The names of the modules (colors) have no meaning.")
## How many genes are there in each of the initial modules (clusters) detected?
## Note: The names of the modules (colors) have no meaning.
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
## dynamicColors
##   antiquewhite4         bisque4           black            blue           blue2 
##              65              87             212             382              50 
##           brown          brown2          brown4          coral1          coral2 
##             338              51              87              68              64 
##            cyan       darkgreen        darkgrey     darkmagenta  darkolivegreen 
##             191             155             135             118             118 
## darkolivegreen4      darkorange     darkorange2         darkred   darkseagreen4 
##              53             131              90             156              70 
##   darkslateblue   darkturquoise      firebrick4     floralwhite           green 
##              86             135              54              93             258 
##     greenyellow          grey60       honeydew1      indianred4           ivory 
##             198             170              71              55              94 
##  lavenderblush3      lightcoral       lightcyan      lightcyan1      lightgreen 
##              72              56             171              97             168 
##      lightpink4  lightsteelblue lightsteelblue1     lightyellow         magenta 
##              72              58              98             167             207 
##          maroon    mediumorchid   mediumpurple2   mediumpurple3    midnightblue 
##              75              64              59             100             188 
##    navajowhite2          orange      orangered3      orangered4   paleturquoise 
##              75             132              59             103             118 
##  palevioletred3            pink            plum           plum1           plum2 
##              76             211              62             107              82 
##          purple             red       royalblue     saddlebrown          salmon 
##             201             214             162             123             191 
##         salmon4         sienna3         skyblue        skyblue1        skyblue2 
##              76             110             129              63              64 
##        skyblue3       steelblue             tan        thistle1        thistle2 
##             108             122             196              80              80 
##       turquoise          violet           white          yellow         yellow4 
##             426             118             131             313              63 
##     yellowgreen 
##             109

2.3 Merge similar modules

User defined parameters:

  • minimum correlation between two modules above which they are merged into one | var-name: MEDissThres
# Calculate eigengenes
MEList = moduleEigengenes(datExpr, colors = dynamicColors)
MEs = MEList$eigengenes

# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs, method = "kendall");

writeLines("Clustering the module eigengenes and identifying a cutoff to merge similar modules")
## Clustering the module eigengenes and identifying a cutoff to merge similar modules
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average");
# Plot the result
# sizeGrWindow(7, 8)
plot(METree, main = "Clustering of module eigengenes",
     xlab = "", sub = "MEDiss = 1-cor(MEs, method = 'kendall')")

# We choose a height cut of 0.4, corresponding to correlation of 0.6, to merge
MEDissThres = 0.4 # user-specified parameter value; see WGCNA manual for more info

# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")

writeLines(paste0("Merging modules that have a correlation ≥ ", 1-MEDissThres))
## Merging modules that have a correlation ≥ 0.6
# Call an automatic merging function
merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
##  mergeCloseModules: Merging modules whose distance is less than 0.4
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 76 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 33 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 24 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 22 module eigengenes in given set.
##    Calculating new MEs...
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 22 module eigengenes in given set.
# The merged module colors
mergedColors = merge$colors;
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;

writeLines("Plotting the identified clusters (denoted with colors) before and after merging.")
## Plotting the identified clusters (denoted with colors) before and after merging.
# sizeGrWindow(12, 9)
plotDendroAndColors(geneTree,
                    cbind(dynamicColors, mergedColors),
                    c("Dynamic Tree Cut", "Merged dynamic"),
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)

# Rename to moduleColors
moduleColors = mergedColors

# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1

2.4 Calculate module-module similarity

writeLines("Calculating module-module similarity based on module-eigengene-expression.")
## Calculating module-module similarity based on module-eigengene-expression.
# Calculate similarity of the eigen-genes
sim_matrix_ME <- cor(mergedMEs, method = "kendall")

# calculate adj_matrix
adj_matrix_ME <- adjacency.fromSimilarity(sim_matrix_ME,
                                          power=1, # DO NOT power transform
                                          type='signed'
)

## CHANGE THE NAMES OF THE MODULES;
module_ids <- data.frame(
  old_labels = rownames(adj_matrix_ME) %>% str_split("ME", 2) %>% sapply("[", 2) %>% as.character(),
  new_labels = paste0("C", 1:nrow(adj_matrix_ME)))
# and coerce into matrix
adj_matrix_ME <- matrix(adj_matrix_ME, nrow=nrow(adj_matrix_ME))
rownames(adj_matrix_ME) <- module_ids$new_labels
colnames(adj_matrix_ME) <- module_ids$new_labels

# ## KEEP THE SAME MODULE NAMES (named by color)
# gene_ids <- rownames(adj_matrix_ME)
# # coerce into a matrix
# adj_matrix_ME <- matrix(adj_matrix_ME, nrow=nrow(adj_matrix_ME))
# rownames(adj_matrix_ME) <- gene_ids
# colnames(adj_matrix_ME) <- gene_ids

writeLines("Plotting the adjacency matrix that shows module-module similarity in expression")
## Plotting the adjacency matrix that shows module-module similarity in expression
gplots::heatmap.2(t(adj_matrix_ME),
                  col=inferno(100),
                  # labRow=NA, labCol=NA,
                  trace='none', dendrogram='row',
                  xlab='', ylab='',
                  # main='Similarity matrix - MEs \n correlation method = "kendall")',
                  main='Adjacency matrix - MEs \n (module-module similarity)',
                  density.info='none', revC=TRUE)

2.5 Visualize the network

pacman::p_load(igraph)

adj_matrix_ME_igraph <- adj_matrix_ME

# get rid of low correlations (0.6 & 0.8 are arbitrary) [0.7 and 0.9]
adj_matrix_ME_igraph[adj_matrix_ME_igraph < 0.6] <- 0
adj_matrix_ME_igraph[adj_matrix_ME_igraph < 0.8 & adj_matrix_ME_igraph>0] <- 0.5
adj_matrix_ME_igraph[adj_matrix_ME_igraph >= 0.9] <- 1

# build_network
network <- graph.adjacency(adj_matrix_ME_igraph,
                           mode = "upper",
                           weighted = T,
                           diag = F)

# simplify network
network <- igraph::simplify(network)  # removes self-loops

# remove isolated vertices (keep only the nodes)
isolated <- which(degree(network)==0)
network <- igraph::delete.vertices(network, isolated)

# E(network)$width <- E(network)$weight + min(E(network)$weight) + 1 # offset=1

# colors <- as.character(module_ids$old_labels)
# V(network)$color <- colors
V(network)$color <- "white"

# genes_ME <- factor(moduleColors, levels=colors) %>% summary()
V(network)$size <- igraph::degree(network, mode = "all")*3
# V(network)$size <- log2(genes_ME)^1.3

V(network)$label.color <- "black"
V(network)$frame.color <- "black"

E(network)$width <- E(network)$weight^2*4
E(network)$color <- "black"

# ## highlight shortest paths between two vetices
# short.path <- igraph::get.shortest.paths(network, "S_5", "S_15")
# E(network, path = unlist(short.path[[1]]))$color <- col.scheme[2]
# E(network, path = unlist(short.path[[1]]))$width <- E(network)$weight*8

writeLines("Visualizing a simplified representation of the circadian GCN, with and without labels")
## Visualizing a simplified representation of the circadian GCN, with and without labels
par(mfrow = c(1,1))

## Circular layout
# png(paste0(path_to_repo, "/results/figures/", sample.name[1], "/", script.name,"/", sample.name[1],"_GCN_1.png"),
#     width = 20, height = 30, units = "cm", res = 1000)
# par(bg=NA)
# plot(network,
#      layout=layout.kamada.kawai,
#        # layout=layout.fruchterman.reingold,
#        # layout=layout.graphopt,
#        # layout=layout_in_circle,
#      vertex.label=NA
#      # vertex.size=hub.score(network)$vector*30
#      # vertex.shape="none"
# )
# dev.off()

# png(paste0(path_to_repo, "/results/figures/", sample.name[1], "/", script.name,"/", sample.name[1],"_GCN_2.png"),
#     width = 20, height = 30, units = "cm", res = 600)
# par(bg=NA)
plot(network,
     size=20,
     layout=layout.kamada.kawai,
       # layout=layout.fruchterman.reingold
       # layout=layout.graphopt
       # layout=layout_in_circle,
     # vertex.label=NA
     # vertex.size=hub.score(network)$vector*30
     vertex.shape="none"
)

# dev.off()
par(mfrow = c(1,1))

Step 3: Annotate the network

list of module x genes

# Make a list that returns gene names for a given cluster
module_genes <- list()

modules.to.exclude <- c("")
# modules.to.exclude <- c(paste0("C",c(2,5,6,7,10:17,19)))
which.modules <- module_ids %>% filter(!new_labels %in% modules.to.exclude) %>% pull(old_labels)
which.labels <- module_ids %>% filter(!new_labels %in% modules.to.exclude) %>% pull(new_labels)

# Get the genes from each of the modules
for (i in 1:length(which.modules)) {
  
  # which color
  mod.color = as.character(which.modules[[i]])
  
  # subset
  module_genes[[i]] <- names(datExpr)[which(moduleColors==mod.color)]
  names(module_genes)[[i]] <- as.character(which.labels[[i]])
}
# # check the result | works
# names(module_genes)
# module_genes['C22']

# [13 Dec 2021]
# Save a csv with the module identity information for all genes used in building the GCN
# make a dataframe with gene_name and module_identity
for (i in 1:length(module_genes)){
  if (i == 1){
    cflo.control.mods <- data.frame(gene_name = module_genes[[i]],
                                    module_identity = as.character(names(module_genes)[i]))
  }
  else{
   foo <- data.frame(gene_name = module_genes[[i]],
                                    module_identity = as.character(names(module_genes)[i]))
    cflo.control.mods <- rbind(cflo.control.mods, foo) 
  }
  
}

# # save the dataframe as a csv
# cflo.control.mods %>% 
#   left_join(module_ids, by = c("module_identity" = "new_labels")) %>% 
#   write.csv(.,
#             paste0(path_to_repo,
#                    "/results/WGCNA/cflo/cflo_heads_control_module_identity_new_labels.csv"),
#             row.names = F)
# # done.

# # save a copy with all the gene annotations
# # load Cflo gene annotations
# cflo_annots <- read.csv(paste0(path_to_repo,"/functions/func_data/cflo_annots.csv"),
#                             header=T, stringsAsFactors = F, na.strings = c(NA,""," "))
# cflo.control.mods %>%
#   left_join(cflo_annots, by="gene_name") %>% head()
#   write.csv(.,
#             paste0(path_to_repo,
#                    "/results/WGCNA/cflo/cflo_heads_control_module_identity_new_labels_annots.csv"),
#             row.names = F)

3.1 Load genes of interest

# load the list of all genes of interest
load(file = paste0(path_to_repo,"/results/genes_of_interest/goi_list.RData"))

# sapply(goi.list, class)

3.4 Where are my genes of interest located?

make your gene sets (character vectors)

## Rhythmic genes
rhy24.control <- goi.list[[1]][[1]][[1]]
rhy24.ocflo <- goi.list[[1]][[2]][[1]]
rhy24.beau <- goi.list[[1]][[3]][[1]]

## Ultradian genes
rhy12.control <- goi.list[[1]][[1]][[2]]
rhy12.ocflo <- goi.list[[1]][[2]][[2]]
rhy12.beau <- goi.list[[1]][[3]][[2]]
rhy08.control <- goi.list[[1]][[1]][[3]]
rhy08.ocflo <- goi.list[[1]][[2]][[3]]
rhy08.beau <- goi.list[[1]][[3]][[3]]

## DRGs
for.brain.head.rhy24.cluster1 <- goi.list[[2]]
for24.nur8 <- goi.list[[6]][[1]]

## DEGs
ocflo.up <- goi.list[[3]][[1]]
ocflo.down <- goi.list[[3]][[2]]
beau.up <- goi.list[[4]][[1]]
beau.down <- goi.list[[4]][[2]]
for.up <- goi.list[[5]][[1]]
for.down <- goi.list[[5]][[2]]

# ###-###-###-###
#   deg.dat <- readRDS(file = paste0(path_to_repo,"/results/cflo/","cflo_inf_v_control_DEGs.Rds"))
#   ophio.down <- deg.dat[[1]] %>% filter(inf_v_control == "down") %>% pull(gene_name)
#   beau.up <- deg.dat[[2]] %>% filter(inf_v_control == "up") %>% pull(gene_name)
# ###-###-###-###
# ###-###-###-###
#   deg.dat <- readRDS(file = paste0(path_to_repo,"/results/cflo/","cflo_inf_v_control_DEGs.Rds"))
#   ophio.up <- deg.dat[[1]] %>% filter(inf_v_control == "up") %>% pull(gene_name)
#   beau.down <- deg.dat[[2]] %>% filter(inf_v_control == "down") %>% pull(gene_name)
# ###-###-###-###

The_comparison

Full comparison

writeLines("#####################################################
How many genes are in each of my geneset of interest?
#####################################################")
## #####################################################
## How many genes are in each of my geneset of interest?
## #####################################################
## MAKE YOUR LIST OF GENES OF INTEREST ##

# LIST ONE - WGCNA modules
list1 <- module_genes
writeLines("List of interesting genes #1
----------------------------
Genes in each of the identified gene-clusters or modules")
## List of interesting genes #1
## ----------------------------
## Genes in each of the identified gene-clusters or modules
sapply(list1, length)
##   C1   C2   C3   C4   C5   C6   C7   C8   C9  C10  C11  C12  C13  C14  C15  C16 
##  188  135   76   62  110  205  155  313  353   93   72  795   86   55  132   64 
##  C17  C18  C19  C20  C21  C22 
##   50 1363 1184  927 1707 1466
## LIST TWO - rhythmic genes
list2 <- list(rhy24.control,
              rhy24.ocflo,
              rhy24.beau,
              
              rhy12.control,
              rhy12.ocflo,
              rhy12.beau,
              
              rhy08.control,
              rhy08.ocflo,
              rhy08.beau,
              
              # for.brain.head.rhy24.cluster1,
              # for24.nur8,
              
              for.up,
              for.down,
              ocflo.up,
              beau.down,
              ocflo.down,
              beau.up)
writeLines("List of interesting genes #2
----------------------------
Rhythmic genes in control Cflo heads")
## List of interesting genes #2
## ----------------------------
## Rhythmic genes in control Cflo heads
names(list2) <- c("rhy24-controls",
                  "rhy24-Ocflo",
                  "rhy24-Beau",
                  
                  "rhy12-controls",
                  "rhy12-Ocflo",
                  "rhy12-Beau",
                  
                  "rhy08-controls",
                  "rhy08-Ocflo",
                  "rhy08-Beau",
                  
                  # "brain-head-rhy24-cluster1",
                  # "for24-nur8",
                  
                  "for-UP",
                  "for-DOWN",
                  "Ocflo-UP",
                  "Beau-DOWN",
                  "Ocflo-DOWN",
                  "Beau-UP")
sapply(list2, length)
## rhy24-controls    rhy24-Ocflo     rhy24-Beau rhy12-controls    rhy12-Ocflo 
##            852            294            673            548           1152 
##     rhy12-Beau rhy08-controls    rhy08-Ocflo     rhy08-Beau         for-UP 
##            844            881            686           1320             34 
##       for-DOWN       Ocflo-UP      Beau-DOWN     Ocflo-DOWN        Beau-UP 
##             47             81             80            143            139
## CHECK FOR OVERLAP

## make a GOM object
gom.1v2 <- newGOM(list1, list2,
       genome.size = nGenes)
png(paste0(path_to_repo, "/results/figures/", sample.name[1],"_gom_1v2.png"),
    width = 50, height = 30, units = "cm", res = 300)
drawHeatmap(gom.1v2,
              adj.p=T,
              cutoff=0.05,
              what="odds.ratio",
              # what="Jaccard",
              log.scale = T,
              note.col = "grey80")
trash <- dev.off()

 # writeLines("How many genes exactly are overlapping between the pairwise comparisons")
# getMatrix(gom.1v4, name = "intersection") %>% t()

writeLines("Visualizing the significant overlaps between your lists of interesting genes and the identified modules")
## Visualizing the significant overlaps between your lists of interesting genes and the identified modules
Where are my genesets of interests?

Where are my genesets of interests?

Focused comparison

writeLines("#####################################################
How many genes are in each of my geneset of interest?
#####################################################")
## #####################################################
## How many genes are in each of my geneset of interest?
## #####################################################
## MAKE YOUR LIST OF GENES OF INTEREST ##

# LIST ONE - WGCNA modules
list1 <- module_genes
writeLines("List of interesting genes #1
----------------------------
Genes in each of the identified gene-clusters or modules")
## List of interesting genes #1
## ----------------------------
## Genes in each of the identified gene-clusters or modules
sapply(list1, length)
##   C1   C2   C3   C4   C5   C6   C7   C8   C9  C10  C11  C12  C13  C14  C15  C16 
##  188  135   76   62  110  205  155  313  353   93   72  795   86   55  132   64 
##  C17  C18  C19  C20  C21  C22 
##   50 1363 1184  927 1707 1466
## LIST TWO - rhythmic genes
list3 <- list(rhy24.control,
              # rhy24.ocflo,
              # rhy24.beau,
              
              # rhy12.control,
              # rhy12.ocflo,
              # rhy12.beau,
              
              # rhy08.control,
              # rhy08.ocflo,
              # rhy08.beau,
              
              for.brain.head.rhy24.cluster1,
              for24.nur8,
              
              # for.up,
              # for.down,
              ocflo.up,
              beau.down,
              ocflo.down,
              beau.up)
writeLines("List of interesting genes #2
----------------------------
Rhythmic genes in control Cflo heads")
## List of interesting genes #2
## ----------------------------
## Rhythmic genes in control Cflo heads
names(list3) <- c("rhy24-controls",
                  # "rhy24-Ocflo",
                  # "rhy24-Beau",
                  
                  # "rhy12-controls",
                  # "rhy12-Ocflo",
                  # "rhy12-Beau",
                  # 
                  # "rhy08-controls",
                  # "rhy08-Ocflo",
                  # "rhy08-Beau",
                  
                  "brain-head-rhy24-cluster1",
                  "for24-nur8",
                  
                  # "for-UP",
                  # "for-DOWN",
                  "Ocflo-UP",
                  "Beau-DOWN",
                  "Ocflo-DOWN",
                  "Beau-UP")
sapply(list3, length)
##            rhy24-controls brain-head-rhy24-cluster1                for24-nur8 
##                       852                       166                       291 
##                  Ocflo-UP                 Beau-DOWN                Ocflo-DOWN 
##                        81                        80                       143 
##                   Beau-UP 
##                       139
## CHECK FOR OVERLAP

## make a GOM object
gom.1v3 <- newGOM(list1, list3,
       genome.size = nGenes)
png(paste0(path_to_repo, "/results/figures/", sample.name[1],"_gom_1v3.png"),
    width = 20, height =30, units = "cm", res = 300)
drawHeatmap(gom.1v3,
              adj.p=T,
              cutoff=0.01,
              what="odds.ratio",
              # what="Jaccard",
              log.scale = T,
              note.col = "orange")
trash <- dev.off()

 # writeLines("How many genes exactly are overlapping between the pairwise comparisons")
# getMatrix(gom.1v4, name = "intersection") %>% t()

writeLines("Visualizing the significant overlaps between your lists of interesting genes and the identified modules")
## Visualizing the significant overlaps between your lists of interesting genes and the identified modules
Where are my genesets of interests?

Where are my genesets of interests?


MODULE EXPRESSION

# which.modules <- c("C22","C21","C12","C18","C20","C19")
# module.plots <- list()
# for (i in 1: length(which.modules)) {
#   
#   module.plots[[i]] <- 
#     module_genes[[which.modules[[i]]]] %>% 
#     stacked.zplot_tc7() %>% 
#     multi.plot(rows = 3, cols = 1)
#   
# }
# 
# png(paste0(path_to_repo, "/results/figures/", sample.name[1],"module_exp.png"),
#     width = 25, height = 40, units = "cm", res = 400)
# module.plots %>% 
#   multi.plot(rows = 2, cols = 3)
# trash <- dev.off()
# which.modules <- c("C8","C9","C1","C13")
# module.plots <- list()
# for (i in 1: length(which.modules)) {
#   
#   module.plots[[i]] <- 
#     module_genes[[which.modules[[i]]]] %>% 
#     stacked.zplot_tc7() %>% 
#     multi.plot(rows = 3, cols = 1)
#   
# }
# 
# png(paste0(path_to_repo, "/results/figures/", sample.name[1],"module_exp_extra.png"),
#     width = 38, height = 20, units = "cm", res = 400)
# module.plots %>% 
#   multi.plot(rows = 1, cols = 4)
# trash <- dev.off()

Step 5: Network statistics

From WGCNA-tutorial

5.1 Intramodular connectivity

“We begin by calculating the intramodular connectivity for each gene. (In network literature, connectivity is often referred to as”degree”.) The function intramodularConnectivity computes:

  • the whole network connectivity kTotal,
  • the within (intra)module connectivity kWithin,
  • the extra-modular connectivity kOut=kTotal-kWithin, and
  • the difference of the intra- and extra-modular connectivities kDiff=kIn-kOut=2*kIN-kTotal
# From what I can tell, colorh1 in the tutorial refers to moduleColors
colorh1 <- moduleColors

# Calculate the connectivities of the genes
Alldegrees1=intramodularConnectivity(adjMat = adj_matrix, colors = colorh1)
Alldegrees1 <- 
  Alldegrees1 %>% 
    rownames_to_column("gene_name") %>% 
    mutate_at(vars(starts_with("k")), 
              function(x){
                round(x,2)
                })
head(Alldegrees1)
##      gene_name kTotal kWithin  kOut  kDiff
## 1 LOC105247750  65.37   36.53 28.85   7.68
## 2 LOC105247756  16.68   10.06  6.62   3.44
## 3 LOC105247758  14.45    1.25 13.20 -11.95
## 4 LOC105247759  33.05   13.08 19.97  -6.89
## 5 LOC105247760  38.38   27.98 10.40  17.58
## 6 LOC105247762  82.64   41.89 40.76   1.13

5.2 (Module Membership) Generalizing intramodular connectivity for all genes

“The intramodular connectivity measure is only defined for the genes inside a given module. But in practice it can be very important to measure how connected a given genes is to biologically interesting modules. Toward this end, we define a module eigengene-based connectivity measure for each gene as the correlation between a the gene expression and the module eigengene.

Specifically,

kMEbrown(i) = cor(xi,MEbrown)

where xi is the gene expression profile of gene i and MEbrown is the module eigengene of the brown module. Note: that the definition does not require that gene i is a member of the brown module. We define a data frame containing the module membership (MM) values for each module. In the past, we called the module membership values kME."

Calculate the signed kME and display the first few rows/columns.

datKME=signedKME(datExpr, mergedMEs, outputColumnName="")
# Display the first few rows of the data frame
datKME[1:6,1:6]
##              antiquewhite4   darkgrey palevioletred3       plum    sienna3
## LOC105247750  -0.582977966 -0.0292407    -0.53088539 -0.2046492 -0.4422372
## LOC105247756  -0.134736623 -0.1109842     0.08356916 -0.0316794 -0.4873282
## LOC105247758   0.327970208 -0.1750913     0.16217572  0.6597732  0.5777528
## LOC105247759   0.434866568 -0.3683951    -0.43755482  0.3015747  0.3853450
## LOC105247760  -0.005824826  0.3691968     0.25115867 -0.3031710 -0.1560268
## LOC105247762  -0.378285335 -0.1911630    -0.04088718  0.4480581  0.3149604
##                honeydew1
## LOC105247750  0.01705074
## LOC105247756 -0.14405833
## LOC105247758  0.52786885
## LOC105247759  0.68866272
## LOC105247760 -0.75585522
## LOC105247762 -0.21570677

Let’s plot the connectivity vs. module-membership values for all identified clusters.

# colorlevels=as.character(module_ids$old_labels)
# # sizeGrWindow(9,6)
# 
# png(paste0(path_to_repo, "/results/figures/", sample.name[1],"/", script.name, "/",
#            sample.name[1],"_connectivity_kWithin_v_modulemembership.png"), 
#     width = 30, height = 40, units = "cm", res = 300)
# 
# par(mfrow=c(as.integer(0.5+length(colorlevels)/4), 4))
# par(mar = c(3,3,2,2))
# # par(mar = c(4,5,3,1))
# for (i in c(1:length(colorlevels))) {
# whichmodule=colorlevels[[i]];
# restrict1 = (colorh1==whichmodule);
# verboseScatterplot(Alldegrees1$kWithin[restrict1],
#                    datKME[restrict1, whichmodule],
#                     col= rgb(red=169, green=169, blue=169, alpha = 80, maxColorValue = 255),
#                     # bg = colorh1[restrict1],
#                     bg = rgb(red=169, green=169, blue=169, alpha = 30, maxColorValue = 255),
#                     pch = 21,
#                     lwd = 1.5,
#                     cex = 1.2,
#                    
#                     # main=whichmodule,
#                     main=module_ids %>% filter(old_labels==whichmodule) %>% pull(new_labels) %>% as.character(),
#                     xlab = "Connectivity_kWithin", ylab = "Module-membership", abline = TRUE)
# }
# 
# trash <- dev.off()

Plotting the mean (± 95% CI) connectivity of genes in different modules

# 
# # Make the summary function (borrowed)
# summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
#                       conf.interval=.95, .drop=TRUE) {
#   
#   # New version of length which can handle NA's: if na.rm==T, don't count them
#   length2 <- function (x, na.rm=FALSE) {
#     if (na.rm) sum(!is.na(x))
#     else  length(x)
#   }
#   
#   # This does the summary. For each group's data frame, return a vector with
#   # N, mean, and sd
#   datac <- plyr::ddply(data, groupvars, .drop=.drop,
#                        .fun = function(xx, col) {
#                          c(N    = length2(xx[[col]], na.rm=na.rm),
#                            mean = mean(xx[[col]], na.rm=na.rm),
#                            sd   = stats::sd(xx[[col]], na.rm=na.rm)
#                          )
#                        },
#                        measurevar
#   )
#   
#   # Rename the "mean" column    
#   datac <- plyr::rename(datac, c("mean" = measurevar))
#   
#   datac$se <- datac$sd / sqrt(as.numeric(datac$N))  # Calculate standard error of the mean
#   
#   # Confidence interval multiplier for standard error
#   # Calculate t-statistic for confidence interval: 
#   # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
#   ciMult <- qt(conf.interval/2 + .5, datac$N-1)
#   datac$ci <- datac$se * ciMult
#   
#   return(datac)
# }

pd <- position_dodge(0.1)

Alldegrees1 %>% 
  # rownames_to_column("gene_name") %>% 
  left_join(cflo.control.mods, by="gene_name") %>%
  
  # PLOT FROM RAW DATA
  mutate(module_identity = factor(module_identity, 
                                  levels = paste0("C",1:nrow(module_ids)))) %>%
  
  ggplot(aes(x=module_identity, y=kTotal)) +
  # ggplot(aes(x=module_identity, y=kWithin)) +
  
    geom_hline(yintercept = 45, col="darkgrey", alpha=0.8) +
    geom_hline(yintercept = 30, col="darkgrey", alpha=0.8) +
    # geom_hline(yintercept = 75, col="darkgrey", alpha=0.8) +
  
    geom_boxplot(fill="lightgrey",
                 alpha=0.6,
                 outlier.color = "grey60",
                 position = "dodge2") +
    
    theme_Publication() +
    scale_colour_Publication() +
    xlab("") +
    ggtitle("") +
  
    ylab("Total connectivity") +
    # ylab("Intramodular connectivity") +
  
    theme(text = element_text(size = 20, colour = 'black'),
          legend.position = "none",
          axis.line.y = element_line(colour = "transparent",size=1)) +
    coord_flip()

  # # SUMMARIZE DATA AND PLOT
  # # summarySE(., measurevar = "kWithin", groupvars = "module_identity") %>% 
  # summarySE(., measurevar = "kTotal", groupvars = "module_identity") %>% 
  # 
  # # make the value column
  # mutate(value=kTotal) %>% 
  # mutate(module_identity = factor(module_identity, 
  #                                 levels = paste0("C",1:nrow(module_ids)))) %>%
  # 
  # # Plot
  # ggplot(aes(x=((module_identity)), y=value)) +
  #   
  #   theme_Publication() +
  #   scale_colour_Publication() +
  #   xlab("") +
  #   ylab("Total connectivity") +
  #   ggtitle("") +
  #   # 
  #   # theme(axis.title.x = element_blank(),
  #   #       axis.title.y = element_blank(),
  #   #       legend.position='none',
  #   #       # legend.title = element_text(size = 15, colour = 'black'),
  #   #       legend.text = element_blank()) +
  #   # ## center align the title
  #   # theme(plot.title = element_blank()) +
  #   # 
  #   # scale_x_continuous(breaks = c(0,12,24)) +
  #   #scale_y_continuous(limits = c(0,40)) +
  #   
  #   # theme(panel.grid.major.x = element_line(colour = "#808080", size=0.1),
  #   #       panel.grid.major.y = element_line(colour = "#808080", size=0.2)) +
  #   # ## if you need highlighting parts of the graph
  #   # geom_rect(aes(xmin = 11.8, xmax = 23.8, ymin = -Inf, ymax = Inf),
  #   #           fill = "lightgrey", alpha = 0.02, color=NA) +
  #   
  #   # geom_line(position=pd,
  #   #           col="#F2CB05", size=2, alpha=1) + # total
  #   #           # col="#BF0404", size=2, alpha=1) + # FS
  #   #           # col="#0FBF67", size=2, alpha=1) + # FA
  #             
  #   
  #   ## Add error bar here
  #   geom_errorbar(aes(ymin=value-ci, ymax=value+ci), 
  #                 width=.4, position=pd, col="black", alpha = 0.7) +
  #   
  #   # Add the points on top of the error bars
  #   geom_point(position=pd, size=2.5,
  #              col="black", fill="grey60",
  #              show.legend = F, color="black", pch=21, alpha=0.9) +
  #   
  #   #facet_wrap(~Phase, nrow=2)
  #   # scale_fill_manual(values = c("black","#F20505","#F2CB05","#0FBF67")) +
  #   # scale_color_manual(values=c("#F20505","#F5D736","#0FBF67")) +
  #   theme(text = element_text(size = 20, colour = 'black'),
  #         legend.position = "none") +
  #   
  #   coord_flip()
  # 
  #   # [as per reviewer request]
  #   # forcing the y-axis to start at 0
  #   # expand_limits(x = 0, y = 0)

5.3 Betweenness centrality

Next, we can look at betweenness centrality:

# network.complete <- graph_from_adjacency_matrix(adj_matrix,
#                                               mode = "undirected",
#                                               weighted = T)
# 
# V(network.complete) %>% length()
# E(network.complete) %>% length()

Step 5: Export the results

There are several things we could output from our analyses, we decided to report the following in the supplementary file:

  • cluster identity
  • total connectivity
  • pfam annotations enriched in the module

Get pfam enrichments for each module

module_pfams <- list()

bg.genes <- dat[[1]] ## all genes used to make the network
which.test <- "pfams"

for (i in 1:length(which.labels)) {
  
  # get name of the module
  m <- which.labels[[i]]
  
  # save the enrichment results
  module_pfams[[i]] <- 
    module_genes[[m]] %>% 
    check_enrichment(.,
                     bg = dat[[1]],
                     what = which.test,
                     org = "cflo",
                     clean = T,
                     expand = T,
                     plot = F)
}
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."
## [1] "No enriched terms found; can't expand."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## Now clean it up
sapply(module_pfams, nrow)
## [[1]]
## [1] 0
## 
## [[2]]
## [1] 67
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## [1] 52
## 
## [[6]]
## [1] 126
## 
## [[7]]
## [1] 77
## 
## [[8]]
## [1] 538
## 
## [[9]]
## [1] 102
## 
## [[10]]
## [1] 11
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL
## 
## [[16]]
## [1] 7
## 
## [[17]]
## [1] 39
## 
## [[18]]
## NULL
## 
## [[19]]
## NULL
## 
## [[20]]
## NULL
## 
## [[21]]
## NULL
## 
## [[22]]
## NULL
c <- 0
for (i in 1:length(module_pfams)) {
  
  if(is.null(nrow(module_pfams[[i]]))) {
    paste(which.labels[[i]],"is null") %>% print()
  } else if(nrow(module_pfams[[i]])==0) {
    paste(which.labels[[i]], "is an empty tibble") %>% print()
  } else {
    
    if(c==0) {
    c <- c+1
    module.pfams <- module_pfams[[i]]
    } else {
      module.pfams <- rbind(module.pfams, module_pfams[[i]])
    }
  }
}
## [1] "C1 is an empty tibble"
## [1] "C3 is null"
## [1] "C4 is null"
## [1] "C11 is null"
## [1] "C12 is null"
## [1] "C13 is null"
## [1] "C14 is null"
## [1] "C15 is null"
## [1] "C18 is null"
## [1] "C19 is null"
## [1] "C20 is null"
## [1] "C21 is null"
## [1] "C22 is null"
## change the name of the column
module.pfams <- 
  module.pfams %>% 
  select(gene_name, enriched_in_module=annot_desc) %>% 
  filter(enriched_in_module!="no_desc")

# check the output dataframe
module.pfams %>% head()
## # A tibble: 6 x 2
## # Groups:   gene_name [6]
##   gene_name    enriched_in_module                                               
##   <chr>        <chr>                                                            
## 1 LOC105247809 BTB/POZ domain                                                   
## 2 LOC105247888 CD80-like C2-set immunoglobulin domain; Immunoglobulin domain    
## 3 LOC105247947 BTB And C-terminal Kelch; BTB/POZ domain                         
## 4 LOC105247967 Neurotransmitter-gated ion-channel ligand binding domain; Neurot…
## 5 LOC105248047 Major Facilitator Superfamily                                    
## 6 LOC105248152 Low-density lipoprotein receptor domain class A

Let’s make the file

results.gcn <-
  cflo.control.mods %>% 
  
  pull(gene_name) %>% 
  
  ## rhythmicity data
  TC7_annotator() %>% 
  select(gene_name, everything()) %>% 
  
  ## cluster identity
  left_join(cflo.control.mods, by="gene_name") %>% 
  
  ## add total connectivity data
  left_join(Alldegrees1 %>% select(gene_name,kTotal), by="gene_name") %>% 
  
  ## add the enriched pfam
  left_join(module.pfams, by="gene_name") %>% 
  
  # Geneset: "for.brain.head.rhy24.cluster1" | DRGs (disease-associated)
  mutate(rhy_brain_head_cluster1 = ifelse(gene_name %in% for.brain.head.rhy24.cluster1, "yes", "no")) %>% 
  # Geneset: "for24.nur8" | DRGs (caste-associated)
  mutate(for24_nur8 = ifelse(gene_name %in% for24.nur8, "yes", "no")) %>% 
  
  ## Geneset: DEGs
  mutate(ocflo_UP = ifelse(gene_name %in% ocflo.up, "yes", "no")) %>% 
  mutate(ocflo_DOWN = ifelse(gene_name %in% ocflo.down, "yes", "no")) %>% 
  mutate(beau_UP = ifelse(gene_name %in% beau.up, "yes", "no")) %>% 
  mutate(beau_DOWN = ifelse(gene_name %in% beau.down, "yes", "no")) %>% 
  
  ## order the columns and rows
  select(module_identity, kTotal, enriched_in_module, 
         gene_name, gene_desc = blast_annotation, 
         rhy_brain_head_cluster1, for24_nur8,
         ocflo_UP, ocflo_DOWN, beau_UP, beau_DOWN,
         everything()) %>% 
  arrange(desc(module_identity), enriched_in_module, desc(kTotal)) 
  
  
# ## export it
# write.csv(results.gcn,
#           file = paste0(path_to_repo, "/results/00_supplementary_files/07_Cflo_forager_head_GCN_results.csv"),
#           row.names = F)

Step 6: Explore modules

module: C12

This module was highly preserved during Ocflo infections but not in Beauveria-infected ants. What’s special about it?

results.gcn %>% 
  filter(module_identity=="C12") %>% 
  
  # ## summarize the results
  # group_by(inf_v_control, control_rhy24) %>% 
  # summarize(n()) %>% 
  
  ## pull rhythmic genes that are up/down-regulated
  # filter(inf_v_control=="up" & control_rhy24=="yes") %>%
  # filter(inf_v_control=="down" & control_rhy24=="yes") %>%
  
  
  ## run enrichments
  pull(gene_name) %>% 
  check_enrichment(.,
                   bg = dat[[1]],
                   org = "cflo",
                   what = "pfams",
                   clean = T,
                   expand = T) %>% 
  
  ## run stacked zplots
  pull(gene_name) %>% 
  stacked.zplot_tc7(plot.mean = T, tc5 = T) %>% 
  multi.plot(rows = 5, cols = 1)
## [1] "Loading annotation file for Camponotus floridanus"
## [1] "Done."
## [1] "Testing for enrichment..."

## TableGrob (5 x 1) "arrange": 5 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (4-4,1-1) arrange gtable[layout]
## 5 5 (5-5,1-1) arrange gtable[layout]

RANDOMS

R1 Plotting the daily expression of core clock, clock modulators, and clock controlled genes

cgs.18 <- c("LOC105252466")
cgs.19 <- c("LOC105250191", "LOC105256631")
cgs.21 <- c("LOC105256454", "LOC105257275", "LOC105255207", "LOC105248529",
            "LOC105253861", "LOC105257836")
cgs.22 <- c("LOC105252510", "LOC105258655", "LOC105251553", "LOC105256952",
            "LOC105252917", "LOC105249574", "LOC105259208")

cgs <- list(cgs.18, cgs.19, cgs.21, cgs.22)

plots <- list()
for (i in 1:length(cgs)){
  
plots[[i]] <-
  cgs[[i]] %>% 
  stacked.zplot_tc7(bg.alpha = 0.4) %>% 
  multi.plot(rows = 1, cols = 3)
}

# png(paste0(path_to_repo, "/results/figures/", sample.name[1],"/",
#            "clock_genes_daily_exp.png"), 
#     width = 40, height = 20, units = "cm", res = 300)
# plots %>% 
#   multi.plot(rows = 1, cols = 4)
# dev.off()